from urllib import request,error,parse
import re
import time
import os

url = "https://www.csdn.net/"
headers = ("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36")
opener = request.build_opener()
opener.addheaders =[headers]
request.install_opener(opener)
while True:
    nowtime = time.localtime()
    dir = str(nowtime.tm_year)+str(nowtime.tm_mon)+str(nowtime.tm_mday)
    #print(dir)
    if(not os.path.exists(dir)):
        os.mkdir(dir)
    try:
        urlpath = request.urlopen(url,timeout=3)
        if (urlpath):
            data = urlpath.read().decode("UTF-8","ignore")
            pat = '"(http://blog.csdn.net/.*?)"'
            alllink = re.compile(pat).findall(data)
            for i in range(1,len(alllink)):
                try:
                    #print(alllink[i])
                    path = "D:\\python\pythonQuanZhanXueXi\\"+dir+"\\"+"blog_"+str(i)+".html"
                    #print(path)
                    request.urlretrieve(alllink[i],path)
                except Exception as err:
                    if hasattr(err, "code"):
                        print(err.code)
                    if hasattr(err, "reason"):
                        print(err.reason)
    except Exception as err:
        if hasattr(err,"code"):
            print(err.code)
        if hasattr(err,"reason"):
            print(err.reason)
    time.sleep(3600*24)